{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recurrent Neural Network in Keras"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook, we use an RNN to classify IMDB movie reviews by their sentiment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import keras\n",
    "from keras.datasets import imdb\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D\n",
    "from keras.layers import SimpleRNN # new! \n",
    "from keras.callbacks import ModelCheckpoint\n",
    "import os\n",
    "from sklearn.metrics import roc_auc_score \n",
    "import matplotlib.pyplot as plt \n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Set hyperparameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# output directory name:\n",
    "output_dir = 'model_output/rnn'\n",
    "\n",
    "# training:\n",
    "epochs = 16 # way more!\n",
    "batch_size = 128\n",
    "\n",
    "# vector-space embedding: \n",
    "n_dim = 64 \n",
    "n_unique_words = 10000 \n",
    "max_review_length = 100 # lowered due to vanishing gradient over time\n",
    "pad_type = trunc_type = 'pre'\n",
    "drop_embed = 0.2 \n",
    "\n",
    "# RNN layer architecture:\n",
    "n_rnn = 256 \n",
    "drop_rnn = 0.2\n",
    "\n",
    "# dense layer architecture: \n",
    "# n_dense = 256\n",
    "# dropout = 0.2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words) # removed n_words_to_skip"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Preprocess data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)\n",
    "x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Design neural network architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length)) \n",
    "model.add(SpatialDropout1D(drop_embed))\n",
    "model.add(SimpleRNN(n_rnn, dropout=drop_rnn))\n",
    "# model.add(Dense(n_dense, activation='relu')) # typically don't see top dense layer in NLP like in \n",
    "# model.add(Dropout(dropout))\n",
    "model.add(Dense(1, activation='sigmoid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_1 (Embedding)      (None, 100, 64)           640000    \n",
      "_________________________________________________________________\n",
      "spatial_dropout1d_1 (Spatial (None, 100, 64)           0         \n",
      "_________________________________________________________________\n",
      "simple_rnn_1 (SimpleRNN)     (None, 256)               82176     \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1)                 257       \n",
      "=================================================================\n",
      "Total params: 722,433\n",
      "Trainable params: 722,433\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary() "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Configure model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "modelcheckpoint = ModelCheckpoint(filepath=output_dir+\"/weights.{epoch:02d}.hdf5\")\n",
    "if not os.path.exists(output_dir):\n",
    "    os.makedirs(output_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Train!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 25000 samples, validate on 25000 samples\n",
      "Epoch 1/16\n",
      "25000/25000 [==============================] - 20s - loss: 0.7038 - acc: 0.5072 - val_loss: 0.6928 - val_acc: 0.5001\n",
      "Epoch 2/16\n",
      "25000/25000 [==============================] - 8s - loss: 0.6901 - acc: 0.5322 - val_loss: 0.6765 - val_acc: 0.5400\n",
      "Epoch 3/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.5885 - acc: 0.6779 - val_loss: 0.5080 - val_acc: 0.7551\n",
      "Epoch 4/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.4547 - acc: 0.7914 - val_loss: 0.4558 - val_acc: 0.8060\n",
      "Epoch 5/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.4667 - acc: 0.7790 - val_loss: 0.4924 - val_acc: 0.7648\n",
      "Epoch 6/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.5061 - acc: 0.7640 - val_loss: 0.4924 - val_acc: 0.7736\n",
      "Epoch 7/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.5080 - acc: 0.7518 - val_loss: 0.5515 - val_acc: 0.7265\n",
      "Epoch 8/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.4262 - acc: 0.8172 - val_loss: 0.5185 - val_acc: 0.7606\n",
      "Epoch 9/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.3753 - acc: 0.8446 - val_loss: 0.4954 - val_acc: 0.7825\n",
      "Epoch 10/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.3802 - acc: 0.8383 - val_loss: 0.5386 - val_acc: 0.7744\n",
      "Epoch 11/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.4068 - acc: 0.8213 - val_loss: 0.6066 - val_acc: 0.6564\n",
      "Epoch 12/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.4169 - acc: 0.8271 - val_loss: 0.5142 - val_acc: 0.7754\n",
      "Epoch 13/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.3484 - acc: 0.8612 - val_loss: 0.5151 - val_acc: 0.7694\n",
      "Epoch 14/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.3471 - acc: 0.8592 - val_loss: 0.6975 - val_acc: 0.6359\n",
      "Epoch 15/16\n",
      "25000/25000 [==============================] - 8s - loss: 0.3922 - acc: 0.8371 - val_loss: 0.5644 - val_acc: 0.7728\n",
      "Epoch 16/16\n",
      "25000/25000 [==============================] - 7s - loss: 0.3025 - acc: 0.8865 - val_loss: 0.5431 - val_acc: 0.7818\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fe63b96b6d8>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 80.6% validation accuracy in epoch 4\n",
    "model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "#### Evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.load_weights(output_dir+\"/weights.03.hdf5\") # zero-indexed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24992/25000 [============================>.] - ETA: 0s"
     ]
    }
   ],
   "source": [
    "y_hat = model.predict_proba(x_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFUZJREFUeJzt3X+QXeV93/H3x8jYsWNb/FgYKskVHiuusWdskx2M65nU\nsVwQkLH4Axp5mlhh1KqT4vxqpq1o/1ALdgb3Fy0zMYka1AhPYpnQOGgMDVUFHredglmMgw2E0RoT\n2IqiDRJyU8Z2RL794z7CF7yrvXe1e6/X5/2a2TnnfM9z7nkeVuznnh/33FQVkqTuec24OyBJGg8D\nQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqqFXj7sDJnH322bV+/fpxd0P6Qd9+\nojd98zvG2w9pDg899NCfV9XEQu0GCoAkvwb8PaCArwPXAOcBe4Ezga8CP19V30vyOuA24CeB54Gf\nraqn2utcB2wDXgJ+uaruOdl+169fz9TU1CBdlEbrv32oN/3Il8bZC2lOSf5skHYLngJKsgb4ZWCy\nqt4NnAZsAT4N3FRVG4Cj9P6w06ZHq+rtwE2tHUkuaNu9C9gEfCbJacMMSpK0dAa9BrAK+LEkq4A3\nAM8CHwbuaOv3AFe2+c1tmbZ+Y5K0+t6q+m5VfQuYBi469SFIkhZjwQCoqv8N/BvgaXp/+I8BDwEv\nVNXx1mwGWNPm1wDPtG2Pt/Zn9dfn2EaSNGKDnAI6g9679/OBvwa8EbhsjqYnniudedbNV3/1/rYn\nmUoyNTs7u1D3JEmLNMgpoI8A36qq2ar6S+APgb8JrG6nhADWAofa/AywDqCtfwtwpL8+xzYvq6pd\nVTVZVZMTEwtexJYkLdIgAfA0cHGSN7Rz+RuBx4D7gKtam63AnW1+X1umrb+3et86sw/YkuR1Sc4H\nNgBfWZphSJKGteBtoFX1QJI76N3qeRx4GNgF3AXsTfLJVru1bXIr8Nkk0/Te+W9pr/Noktvphcdx\n4NqqemmJxyNJGlB+mL8ScnJysvwcgH4o+TkA/RBL8lBVTS7UzkdBSFJH/VA/CkKSxmn9jrvGtu+n\nbrxi2ffhEYAkdZQBIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FEGgCR1lAEgSR1lAEhS\nRxkAktRRBoAkdZQBIEkdZQBIUkcZAJLUUQsGQJJ3JPla38+3k/xqkjOT7E9ysE3PaO2T5OYk00ke\nSXJh32ttbe0PJtk6/14lScttwQCoqieq6r1V9V7gJ4EXgS8AO4ADVbUBONCWAS4DNrSf7cAtAEnO\nBHYC7wcuAnaeCA1J0ugNewpoI/DNqvozYDOwp9X3AFe2+c3AbdVzP7A6yXnApcD+qjpSVUeB/cCm\nUx6BJGlRhg2ALcDn2vy5VfUsQJue0+prgGf6tplptfnqr5Bke5KpJFOzs7NDdk+SNKiBAyDJ6cBH\ngT9YqOkctTpJ/ZWFql1VNVlVkxMTE4N2T5I0pGGOAC4DvlpVz7Xl59qpHdr0cKvPAOv6tlsLHDpJ\nXZI0BsMEwMf4/ukfgH3AiTt5tgJ39tU/3u4Guhg41k4R3QNckuSMdvH3klaTJI3BqkEaJXkD8LeB\nf9BXvhG4Pck24Gng6la/G7gcmKZ3x9A1AFV1JMkNwIOt3fVVdeSURyBJWpSBAqCqXgTOelXteXp3\nBb26bQHXzvM6u4Hdw3dTkrTU/CSwJHWUASBJHWUASFJHGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRR\nBoAkdZQBIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FEGgCR11EABkGR1kjuS/GmSx5N8\nIMmZSfYnOdimZ7S2SXJzkukkjyS5sO91trb2B5NsnX+PkqTlNugRwH8A/riq/gbwHuBxYAdwoKo2\nAAfaMsBlwIb2sx24BSDJmcBO4P3ARcDOE6EhSRq9BQMgyZuBnwJuBaiq71XVC8BmYE9rtge4ss1v\nBm6rnvuB1UnOAy4F9lfVkao6CuwHNi3paCRJAxvkCOBtwCzwn5I8nOR3krwROLeqngVo03Na+zXA\nM33bz7TafHVJ0hgMEgCrgAuBW6rqfcD/4/une+aSOWp1kvorN062J5lKMjU7OztA9yRJizFIAMwA\nM1X1QFu+g14gPNdO7dCmh/var+vbfi1w6CT1V6iqXVU1WVWTExMTw4xFkjSEVQs1qKr/k+SZJO+o\nqieAjcBj7WcrcGOb3tk22Qd8Isleehd8j1XVs0nuAX6j78LvJcB1SzucV1q/467lfPl5PXXjFWPZ\nryQNY8EAaH4J+L0kpwNPAtfQO3q4Pck24Gng6tb2buByYBp4sbWlqo4kuQF4sLW7vqqOLMkoJElD\nGygAquprwOQcqzbO0baAa+d5nd3A7mE6KElaHn4SWJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSO\nMgAkqaMMAEnqKANAkjrKAJCkjjIAJKmjDABJ6igDQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSO\nGigAkjyV5OtJvpZkqtXOTLI/ycE2PaPVk+TmJNNJHklyYd/rbG3tDybZujxDkiQNYpgjgJ+uqvdW\n1YnvBt4BHKiqDcCBtgxwGbCh/WwHboFeYAA7gfcDFwE7T4SGJGn0TuUU0GZgT5vfA1zZV7+teu4H\nVic5D7gU2F9VR6rqKLAf2HQK+5cknYJBA6CA/5rkoSTbW+3cqnoWoE3PafU1wDN928602nx1SdIY\nrBqw3Qer6lCSc4D9Sf70JG0zR61OUn/lxr2A2Q7w1re+dcDuSZKGNdARQFUdatPDwBfoncN/rp3a\noU0Pt+YzwLq+zdcCh05Sf/W+dlXVZFVNTkxMDDcaSdLAFgyAJG9M8qYT88AlwDeAfcCJO3m2Ane2\n+X3Ax9vdQBcDx9oponuAS5Kc0S7+XtJqkqQxGOQU0LnAF5KcaP/7VfXHSR4Ebk+yDXgauLq1vxu4\nHJgGXgSuAaiqI0luAB5s7a6vqiNLNhJJ0lAWDICqehJ4zxz154GNc9QLuHae19oN7B6+m5KkpeYn\ngSWpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqKANAkjrKAJCkjjIAJKmjDABJ6igDQJI6ygCQpI4y\nACSpowwASeooA0CSOsoAkKSOMgAkqaMMAEnqqIEDIMlpSR5O8sW2fH6SB5IcTPL5JKe3+uva8nRb\nv77vNa5r9SeSXLrUg5EkDW6YI4BfAR7vW/40cFNVbQCOAttafRtwtKreDtzU2pHkAmAL8C5gE/CZ\nJKedWvclSYs1UAAkWQtcAfxOWw7wYeCO1mQPcGWb39yWaes3tvabgb1V9d2q+hYwDVy0FIOQJA1v\n0COAfw/8E+Cv2vJZwAtVdbwtzwBr2vwa4BmAtv5Ya/9yfY5tXpZke5KpJFOzs7NDDEWSNIwFAyDJ\nzwCHq+qh/vIcTWuBdSfb5vuFql1VNVlVkxMTEwt1T5K0SKsGaPNB4KNJLgdeD7yZ3hHB6iSr2rv8\ntcCh1n4GWAfMJFkFvAU40lc/oX8bSdKILXgEUFXXVdXaqlpP7yLuvVX1d4H7gKtas63AnW1+X1um\nrb+3qqrVt7S7hM4HNgBfWbKRSJKGMsgRwHz+KbA3ySeBh4FbW/1W4LNJpum9898CUFWPJrkdeAw4\nDlxbVS+dwv4lSadgqACoqi8BX2rzTzLHXTxV9R3g6nm2/xTwqWE7udKs33HXWPb71I1XjGW/klYm\nPwksSR1lAEhSRxkAktRRBoAkdZQBIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FEGgCR1\nlAEgSR1lAEhSRxkAktRRBoAkdZQBIEkdtWAAJHl9kq8k+ZMkjyb5l61+fpIHkhxM8vkkp7f669ry\ndFu/vu+1rmv1J5JculyDkiQtbJAjgO8CH66q9wDvBTYluRj4NHBTVW0AjgLbWvttwNGqejtwU2tH\nkgvofT/wu4BNwGeSnLaUg5EkDW7BAKiev2iLr20/BXwYuKPV9wBXtvnNbZm2fmOStPreqvpuVX0L\nmGaO7xSWJI3GQNcAkpyW5GvAYWA/8E3ghao63prMAGva/BrgGYC2/hhwVn99jm0kSSM2UABU1UtV\n9V5gLb137e+cq1mbZp5189VfIcn2JFNJpmZnZwfpniRpEYa6C6iqXgC+BFwMrE6yqq1aCxxq8zPA\nOoC2/i3Akf76HNv072NXVU1W1eTExMQw3ZMkDWGQu4Amkqxu8z8GfAR4HLgPuKo12wrc2eb3tWXa\n+nurqlp9S7tL6HxgA/CVpRqIJGk4qxZuwnnAnnbHzmuA26vqi0keA/Ym+STwMHBra38r8Nkk0/Te\n+W8BqKpHk9wOPAYcB66tqpeWdjiSpEEtGABV9QjwvjnqTzLHXTxV9R3g6nle61PAp4bvpiRpqflJ\nYEnqKANAkjrKAJCkjjIAJKmjDABJ6igDQJI6ygCQpI4yACSpowwASeooA0CSOsoAkKSOMgAkqaMG\neRqoJI3V+h13jbsLP5I8ApCkjjIAJKmjDABJ6igDQJI6ygCQpI4a5Evh1yW5L8njSR5N8iutfmaS\n/UkOtukZrZ4kNyeZTvJIkgv7Xmtra38wydb59ilJWn6DHAEcB369qt4JXAxcm+QCYAdwoKo2AAfa\nMsBlwIb2sx24BXqBAewE3k/vu4R3nggNSdLoLRgAVfVsVX21zf9f4HFgDbAZ2NOa7QGubPObgduq\n535gdZLzgEuB/VV1pKqOAvuBTUs6GknSwIa6BpBkPfA+4AHg3Kp6FnohAZzTmq0BnunbbKbV5qtL\nksZg4ABI8uPAfwZ+taq+fbKmc9TqJPVX72d7kqkkU7Ozs4N2T5I0pIECIMlr6f3x/72q+sNWfq6d\n2qFND7f6DLCub/O1wKGT1F+hqnZV1WRVTU5MTAwzFknSEAa5CyjArcDjVfXv+lbtA07cybMVuLOv\n/vF2N9DFwLF2iuge4JIkZ7SLv5e0miRpDAZ5GNwHgZ8Hvp7ka632z4AbgduTbAOeBq5u6+4GLgem\ngReBawCq6kiSG4AHW7vrq+rIkoxCkjS0BQOgqv4Hc5+/B9g4R/sCrp3ntXYDu4fpoCRpefhJYEnq\nKANAkjrKAJCkjjIAJKmjDABJ6igDQJI6ygCQpI4yACSpowb5JLBWiPU77hrbvp+68Yqx7VvS4ngE\nIEkdZQBIUkcZAJLUUQaAJHWUASBJHWUASFJHGQCS1FEGgCR11CDfCbw7yeEk3+irnZlkf5KDbXpG\nqyfJzUmmkzyS5MK+bba29geTbJ1rX5Kk0RnkCOB3gU2vqu0ADlTVBuBAWwa4DNjQfrYDt0AvMICd\nwPuBi4CdJ0JDkjQeCwZAVX0ZePWXt28G9rT5PcCVffXbqud+YHWS84BLgf1VdaSqjgL7+cFQkSSN\n0GKvAZxbVc8CtOk5rb4GeKav3UyrzVeXJI3JUl8Ezhy1Okn9B18g2Z5kKsnU7OzsknZOkvR9iw2A\n59qpHdr0cKvPAOv62q0FDp2k/gOqaldVTVbV5MTExCK7J0layGIfB70P2Arc2KZ39tU/kWQvvQu+\nx6rq2ST3AL/Rd+H3EuC6xXdb0jiM85HjWnoLBkCSzwEfAs5OMkPvbp4bgduTbAOeBq5uze8GLgem\ngReBawCq6kiSG4AHW7vrq+rVF5YlSSO0YABU1cfmWbVxjrYFXDvP6+wGdg/VO60Y43pn6BfRSIvn\nJ4ElqaMMAEnqKANAkjrKAJCkjlrsbaCSxsRbMbVUDACtaOP6Y7j3bc9z8dvOGsu+paXiKSBJ6iiP\nAKRFuv/J59ni6RitYB4BSFJHGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRBoAkdZQBIEkdZQBIUkcZ\nAJLUUSMPgCSbkjyRZDrJjlHvX5LUM9IASHIa8JvAZcAFwMeSXDDKPkiSekZ9BHARMF1VT1bV94C9\nwOYR90GSxOgDYA3wTN/yTKtJkkZs1I+Dzhy1ekWDZDuwvS3+RZInhtzH2cCfL6JvK53jHqEPvDz3\nM6Pe9Qld/H13asz59Muzixn3Xx+k0agDYAZY17e8FjjU36CqdgG7FruDJFNVNbnY7Vcqx90tXRx3\nF8cMyzvuUZ8CehDYkOT8JKcDW4B9I+6DJIkRHwFU1fEknwDuAU4DdlfVo6PsgySpZ+RfCVlVdwN3\nL+MuFn36aIVz3N3SxXF3ccywjONOVS3cSpL0I8dHQUhSR63YAFjokRJJXpfk8239A0nWj76XS2+A\ncf+jJI8leSTJgSQD3Q72w27QR4gkuSpJJVnxd4sMMuYkf6f9vh9N8vuj7uNyGODf+FuT3Jfk4fbv\n/PJx9HMpJdmd5HCSb8yzPklubv9NHkly4ZLsuKpW3A+9C8jfBN4GnA78CXDBq9r8Q+C32vwW4PPj\n7veIxv3TwBva/C92Zdyt3ZuALwP3A5Pj7vcIftcbgIeBM9ryOePu94jGvQv4xTZ/AfDUuPu9BOP+\nKeBC4BvzrL8c+C/0Pkt1MfDAUux3pR4BDPJIic3AnjZ/B7AxyVwfRFtJFhx3Vd1XVS+2xfvpfdZi\npRv0ESI3AP8K+M4oO7dMBhnz3wd+s6qOAlTV4RH3cTkMMu4C3tzm38KrPku0ElXVl4EjJ2myGbit\neu4HVic571T3u1IDYJBHSrzcpqqOA8eAs0bSu+Uz7KM0ttF717DSLTjuJO8D1lXVF0fZsWU0yO/6\nJ4CfSPI/k9yfZNPIerd8Bhn3vwB+LskMvTsKf2k0XRurZXmMzshvA10iCz5SYsA2K83AY0ryc8Ak\n8LeWtUejcdJxJ3kNcBPwC6Pq0AgM8rteRe800IfoHen99yTvrqoXlrlvy2mQcX8M+N2q+rdJPgB8\nto37r5a/e2OzLH/PVuoRwIKPlOhvk2QVvUPFkx1irQSDjJskHwH+OfDRqvruiPq2nBYa95uAdwNf\nSvIUvXOk+1b4heBB/43fWVV/WVXfAp6gFwgr2SDj3gbcDlBV/wt4Pb3n5fwoG+j//WGt1AAY5JES\n+4Ctbf4q4N5qV1NWsAXH3U6F/Da9P/4/CueEYYFxV9Wxqjq7qtZX1Xp61z4+WlVT4+nukhjk3/gf\n0bvoT5Kz6Z0SenKkvVx6g4z7aWAjQJJ30guA2ZH2cvT2AR9vdwNdDByrqmdP9UVX5CmgmueREkmu\nB6aqah9wK71Dw2l67/y3jK/HS2PAcf9r4MeBP2jXvJ+uqo+OrdNLYMBx/0gZcMz3AJckeQx4CfjH\nVfX8+Hp96gYc968D/zHJr9E7DfILK/3NXZLP0TuVd3a7trETeC1AVf0WvWsdlwPTwIvANUuy3xX+\n302StEgr9RSQJOkUGQCS1FEGgCR1lAEgSR1lAEhSRxkAktRRBoAkdZQBIEkd9f8BD9hpONTMnCQA\nAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fe63b6c5320>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(y_hat)\n",
    "_ = plt.axvline(x=0.5, color='orange')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'87.84'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"{:0.2f}\".format(roc_auc_score(y_valid, y_hat)*100.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
